package practice.test1.webETL;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WebLogMapper extends Mapper<LongWritable, Text,Text, NullWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String line = value.toString();

        String[] split = line.split(" ");

        //进行每一行数据的长度筛选，长度大于11则res为true
        boolean res = parseLog(split);
        //res为true，则直接输出
        if(res) context.write(value, NullWritable.get());
    }

    //进行数据长度判断
    private boolean parseLog(String[] split) {
        if(split.length <= 11){
            return true;
        }else{
            return false;
        }
    }
}
